#!/pro/bin/perl

use strict;
use warnings;

# csvdiff: Show differences between CSV files
#	   (m)'19 [05 Mar 2019] Copyright H.M.Brand 2009-2021

our $VERSION = "1.02 - 20190305";

sub usage {
    my $err = shift and select STDERR;
    print "usage: csvdiff [--no-color] [--html] [-w|-b|-Z] file.csv file.csv\n",
	"  provides colorized diff on sorted CSV files\n",
	"  assuming first line is header and first field is the key\n",
	"          --no-color               do not use colors\n",
	"    -h    --html                   produce HTML output\n",
	"    -w    --ignore-all-space       ignore all whistespace is all fields\n",
	"    -b    --ignore-space-change    ignore whitespace length changes\n",
	"    -Z    --ignore-trailing-space  ignore trailing whitespace per field\n",
	"    -o F  --output=F               send output to file F\n";
    exit $err;
    } # usage

use Getopt::Long qw(:config bundling nopermute );
my $opt_c = !$ENV{NO_COLOR};
GetOptions (
    "help|?"		=> sub { usage (0); },
    "V|version"		=> sub { print "csvdiff [$VERSION]\n"; exit 0 },

    "w|ignore-all-space!"		=> \my $opt_w,
    "b|ignore-ws|ignore-space-change!"	=> \my $opt_b,
    "Z|ignore-trailing-space!"		=> \my $opt_Z,

    "c|color|colour!"			=> \   $opt_c,
    "h|html"				=> \my $opt_h,

    "o|output=s"			=> \my $opt_o,
    ) or usage (1);

@ARGV == 2 or usage (1);

if ($opt_o) {
    open STDOUT, ">", $opt_o or die "$opt_o: $!\n";
    }

use HTML::Entities;
use Term::ANSIColor qw(:constants);
use Text::CSV_XS;
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 0 });

if ($opt_h) {
    binmode STDOUT, ":encoding(utf-8)";
    print <<EOH;
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
  <title>CFI School updates</title>
  <meta name="Generator"     content="perl $]" />
  <meta name="Author"        content="@{[scalar getpwuid $<]}" />
  <meta name="Description"   content="CSV diff @ARGV" />
  <style type="text/css">
    .rd { background:	#ffe0e0;	}
    .gr { background:	#e0ffe0;	}
    .hd { background:	#e0e0ff;	}
    .b0 { background:	#e0e0e0;	}
    .b1 { background:	#f0f0f0;	}
    .r  { color:	red;		}
    .g  { color:	green;		}
    </style>
  </head>
<body>

<h1>CSV diff @ARGV</h1>

<table>
EOH
    $::{RED}	= sub { "\cA\rr";	};
    $::{GREEN}	= sub { "\cA\rg";	};
    $::{RESET}	= sub { "";		};
    }
elsif (!$opt_c) {
    $::{$_} = sub { "" } for qw( RED GREEN RESET );
    }

my @f;
my $opt_n = 1;
foreach my $x (0, 1) {
    open my $fh, "<", $ARGV[$x] or die "$ARGV[$x]: $!\n";
    my $n = 0;
    while (1) {
	my $row = $csv->getline ($fh) or last;
	@$row and push @{$f[$x]}, $row;
	$n++ && $row->[0] =~ m/\D/ and $opt_n = 0;
	}
    }
my @n   = map { $#{$f[$_]} } 0, 1;
my @i   = (1, 1);
my $hdr = "# csvdiff   < $ARGV[0]    > $ARGV[1]\n";

$f[$_][1+$n[$_]][0] = $opt_n ? 2147483647 : "\xff\xff\xff\xff" for 0, 1;

my %cls;
   %cls = (
    "b" => 0,
    "-"	=> sub { "rd" },
    "+"	=> sub { "gr" },
    "H"	=> sub { "hd" },
    "<"	=> sub { $cls{b} ^= 1; "b$cls{b}" },
    ">"	=> sub { "b$cls{b}" },
    );

sub show {
    my ($pfx, $x) = @_;
    my $row = $f[$x][$i[$x]++] or return;

    if ($opt_h) {
	my $bg = $cls{$pfx}->();
	print qq{  <tr class="$bg">},
	    (map{"<td".(s/^\cA\r([gr])//?qq{ class="$1"}:"").">$_</td>"}@$row),
	    "</tr>\n";
	return;
	}

    print $hdr, $pfx, " ", $pfx eq "-" ? RED : $pfx eq "+" ? GREEN : "";
    $csv->print (*STDOUT, $row);
    print RESET, "\n";
    $hdr = "";
    } # show

# Skip first line of both are same: it probably is a header
my @h0 = @{$f[0][0]};
my @h1 = @{$f[1][0]};
if ("@h0" eq "@h1") {
    if ($opt_h) {
    	$i[0]--;
    	show ("H", 0);
    	}
    shift @{$f[0]};
    shift @{$f[1]};
    }

my $x = 0;
while ($i[0] <= $n[0] || $i[1] <= $n[1]) {
    my @r0 = @{$f[0][$i[0]]};
    my @r1 = @{$f[1][$i[1]]};

    if ($opt_n) {
	$r0[0] <  $r1[0] and show ("-", 0), next;
	$r0[0] >  $r1[0] and show ("+", 1), next;
	}
    else {
	$r0[0] lt $r1[0] and show ("-", 0), next;
	$r0[0] gt $r1[0] and show ("+", 1), next;
	}

    my @v0 = @r0;
    my @v1 = @r1;
    $opt_Z and s/[\r\n\s]+\z// for @v0, @v1;
    $opt_b and s/[\r\n\s]+/ /g for @v0, @v1;
    $opt_w and s/[\r\n\s]+//g  for @v0, @v1;

    {	local $" = "\xFF";
	"@v0" eq "@v1" and $i[0]++, $i[1]++, next;
	}

    foreach my $c (1 .. $#h0) {
	my ($L, $R) = map { $_ // "" } $r0[$c], $r1[$c];
	$L eq $R and next;
	$f[0][$i[0]][$c] = RED   . $L . RESET;
	$f[1][$i[1]][$c] = GREEN . $R . RESET;
	}

    show ("<", 0);
    show (">", 1);
    }

$opt_h and print "  </table>\n</body>\n</html>\n";

close STDOUT;
